Preparations

knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file(), fig.width=16)
options(scipen=999)
library(tidyverse)
── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.1.0       ✔ purrr   0.3.1  
✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
✔ tidyr   0.8.3       ✔ stringr 1.4.0  
✔ readr   1.3.1       ✔ forcats 0.4.0  
── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Read bas files

read_basfiles <- function(bas_filepath) {
  
  bas_file <- read_tsv(bas_filepath,
                  col_types = cols_only('sample' = 'c', 
                                        'platform' = 'c',        
                                       'average_quality_of_mapped_bases' = col_guess())) 
  return( bas_file)
}

Preprocess table

bas_files <- list.files(path = "./data/bas",
                        pattern="*bam.bas",
                        full.names=TRUE)
bas_dt <- 
  map_dfr(bas_files, read_basfiles ) %>%
  rename(avg_qual="average_quality_of_mapped_bases")
bas_dt
bas_dt <-
  bas_dt %>%
  group_by(sample) %>%
  mutate(id=1:n()) %>%
  select(-platform)%>%
  spread(id,avg_qual)
bas_dt
n_not_na <- function(x) {
 sum(!is.na(x))
}
 bas_dt <- 
  bas_dt %>%
  ungroup()%>% 
    mutate( avg_qual_sd =pmap_dbl(select(., -sample), lift_vd(sd,na.rm =TRUE )),
           avg_qual_mean = rowMeans(select(., -sample),na.rm =TRUE),
           avg_qual_n= pmap_dbl(select(., -sample), lift_vd(n_not_na)),
           avg_qual_min= pmap_dbl(select(., -sample), lift_vd(min, na.rm =TRUE)),
           avg_qual_max= pmap_dbl(select(., -sample), lift_vd(max, na.rm =TRUE))) %>%
  select (sample, avg_qual_n, avg_qual_min,avg_qual_max,avg_qual_mean,avg_qual_sd, everything() ) 
ped <- 
  read_tsv("./data/metadata/20130606_g1k.ped")%>% 
  rename(sample="Individual ID")
Parsed with column specification:
cols(
  `Family ID` = col_character(),
  `Individual ID` = col_character(),
  `Paternal ID` = col_character(),
  `Maternal ID` = col_character(),
  Gender = col_double(),
  Phenotype = col_double(),
  Population = col_character(),
  Relationship = col_character(),
  Siblings = col_character(),
  `Second Order` = col_character(),
  `Third Order` = col_character(),
  `Other Comments` = col_character()
)
ped 
bas_dt <- inner_join( ped  %>% 
                       select(sample, Population),
                       bas_dt,
                     by="sample")
bas_dt
bas_filepath <- "./data/metadata/average_quality_dt.txt"
write_tsv(bas_dt, bas_filepath)

Plot

ggplot(bas_dt,
       aes(x=avg_qual_min,
           y=avg_qual_max, 
           colour=avg_qual_n )) +
  geom_point(size=2)

ggplot(bas_dt,
       aes(x=avg_qual_mean,
           y=avg_qual_sd, 
           colour=avg_qual_n )) +
  geom_point(size=2)
Warning: Removed 366 rows containing missing values (geom_point).

LS0tCnRpdGxlOiAiSEdQIGFuYWx5c2lzIgphdXRob3I6IAotIG5hbWU6IFJpY2sgRmFyb3VuaSwgCiAgYWZmaWxpYXRpb246CiAgLSAmY3J1ayBEZXBhcnRtZW50IG9mIEh1bWFuIEdlbmV0aWNzLCBNY0dpbGwgVW5pdmVyc2l0eSwgIE1vbnRyZWFsLCBDYW5hZGEKZGF0ZTogJ2ByIGZvcm1hdChTeXMuRGF0ZSgpLCAiJVktJUItJWQiKWAnCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgICBjb2RlX2ZvbGRpbmc6IHNob3cKICAgIHRvYzogeWVzCiAgICB0b2NfZmxvYXQ6IAogICAgICBjb2xsYXBzZWQ6IGZhbHNlCiAgICAgIHNtb290aF9zY3JvbGw6IGZhbHNlCi0tLQoKIyBQcmVwYXJhdGlvbnMKCmBgYHtyIHNldHVwfQprbml0cjo6b3B0c19rbml0JHNldChyb290LmRpciA9IHJwcm9qcm9vdDo6ZmluZF9yc3R1ZGlvX3Jvb3RfZmlsZSgpLCBmaWcud2lkdGg9MTYpCm9wdGlvbnMoc2NpcGVuPTk5OSkKYGBgCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKCiMjIFJlYWQgYmFzIGZpbGVzCgpgYGB7ciByZWFkX2Jhc30KcmVhZF9iYXNmaWxlcyA8LSBmdW5jdGlvbihiYXNfZmlsZXBhdGgpIHsKICAKICBiYXNfZmlsZSA8LSByZWFkX3RzdihiYXNfZmlsZXBhdGgsCiAgICAgICAgICAgICAgICAgIGNvbF90eXBlcyA9IGNvbHNfb25seSgnc2FtcGxlJyA9ICdjJywgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAncGxhdGZvcm0nID0gJ2MnLCAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdhdmVyYWdlX3F1YWxpdHlfb2ZfbWFwcGVkX2Jhc2VzJyA9IGNvbF9ndWVzcygpKSkgCiAgcmV0dXJuKCBiYXNfZmlsZSkKfQpgYGAKCiMjIyBQcmVwcm9jZXNzIHRhYmxlCgoKCmBgYHtyfQpiYXNfZmlsZXMgPC0gbGlzdC5maWxlcyhwYXRoID0gIi4vZGF0YS9iYXMiLAogICAgICAgICAgICAgICAgICAgICAgICBwYXR0ZXJuPSIqYmFtLmJhcyIsCiAgICAgICAgICAgICAgICAgICAgICAgIGZ1bGwubmFtZXM9VFJVRSkKYGBgCgpgYGB7cn0KCgpiYXNfZHQgPC0gCiAgbWFwX2RmcihiYXNfZmlsZXMsIHJlYWRfYmFzZmlsZXMgKSAlPiUKICByZW5hbWUoYXZnX3F1YWw9ImF2ZXJhZ2VfcXVhbGl0eV9vZl9tYXBwZWRfYmFzZXMiKQpiYXNfZHQKYGBgCgoKYGBge3J9CmJhc19kdCA8LQogIGJhc19kdCAlPiUKICBncm91cF9ieShzYW1wbGUpICU+JQogIG11dGF0ZShpZD0xOm4oKSkgJT4lCiAgc2VsZWN0KC1wbGF0Zm9ybSklPiUKICBzcHJlYWQoaWQsYXZnX3F1YWwpCmJhc19kdApgYGAKYGBge3J9Cm5fbm90X25hIDwtIGZ1bmN0aW9uKHgpIHsKIHN1bSghaXMubmEoeCkpCn0KYGBgCgoKYGBge3J9CiBiYXNfZHQgPC0gCiAgYmFzX2R0ICU+JQogIHVuZ3JvdXAoKSU+JSAKICAgIG11dGF0ZSggYXZnX3F1YWxfc2QgPXBtYXBfZGJsKHNlbGVjdCguLCAtc2FtcGxlKSwgbGlmdF92ZChzZCxuYS5ybSA9VFJVRSApKSwKICAgICAgICAgICBhdmdfcXVhbF9tZWFuID0gcm93TWVhbnMoc2VsZWN0KC4sIC1zYW1wbGUpLG5hLnJtID1UUlVFKSwKICAgICAgICAgICBhdmdfcXVhbF9uPSBwbWFwX2RibChzZWxlY3QoLiwgLXNhbXBsZSksIGxpZnRfdmQobl9ub3RfbmEpKSwKICAgICAgICAgICBhdmdfcXVhbF9taW49IHBtYXBfZGJsKHNlbGVjdCguLCAtc2FtcGxlKSwgbGlmdF92ZChtaW4sIG5hLnJtID1UUlVFKSksCiAgICAgICAgICAgYXZnX3F1YWxfbWF4PSBwbWFwX2RibChzZWxlY3QoLiwgLXNhbXBsZSksIGxpZnRfdmQobWF4LCBuYS5ybSA9VFJVRSkpKSAlPiUKICBzZWxlY3QgKHNhbXBsZSwgYXZnX3F1YWxfbiwgYXZnX3F1YWxfbWluLGF2Z19xdWFsX21heCxhdmdfcXVhbF9tZWFuLGF2Z19xdWFsX3NkLCBldmVyeXRoaW5nKCkgKSAKYGBgCgoKCmBgYHtyfQpwZWQgPC0gCiAgcmVhZF90c3YoIi4vZGF0YS9tZXRhZGF0YS8yMDEzMDYwNl9nMWsucGVkIiklPiUgCiAgcmVuYW1lKHNhbXBsZT0iSW5kaXZpZHVhbCBJRCIpCnBlZCAKYGBgCgoKYGBge3J9CmJhc19kdCA8LSBpbm5lcl9qb2luKCBwZWQgICU+JSAKICAgICAgICAgICAgICAgICAgICAgICBzZWxlY3Qoc2FtcGxlLCBQb3B1bGF0aW9uKSwKICAgICAgICAgICAgICAgICAgICAgICBiYXNfZHQsCiAgICAgICAgICAgICAgICAgICAgIGJ5PSJzYW1wbGUiKQpiYXNfZHQKYGBgCgoKYGBge3J9CmJhc19maWxlcGF0aCA8LSAiLi9kYXRhL21ldGFkYXRhL2F2ZXJhZ2VfcXVhbGl0eV9kdC50eHQiCndyaXRlX3RzdihiYXNfZHQsIGJhc19maWxlcGF0aCkKYGBgCgpQbG90IAogCgpgYGB7ciBmaWcud2lkdGg9MjB9CmdncGxvdChiYXNfZHQsCiAgICAgICBhZXMoeD1hdmdfcXVhbF9taW4sCiAgICAgICAgICAgeT1hdmdfcXVhbF9tYXgsIAogICAgICAgICAgIGNvbG91cj1hdmdfcXVhbF9uICkpICsKICBnZW9tX3BvaW50KHNpemU9MikKIApgYGAKCgpgYGB7ciBmaWcud2lkdGg9MjB9CmdncGxvdChiYXNfZHQsCiAgICAgICBhZXMoeD1hdmdfcXVhbF9tZWFuLAogICAgICAgICAgIHk9YXZnX3F1YWxfc2QsIAogICAgICAgICAgIGNvbG91cj1hdmdfcXVhbF9uICkpICsKICBnZW9tX3BvaW50KHNpemU9MikKIApgYGAK